/*******************************************************************************
Project:        Wealth -- Smith, Zidar, and Zwick
Last modified:  2021-10-1
Description:    This file retrieves inputs and outputs for our classical minimum
                distance exercises.
*******************************************************************************/

/*******************************************************************************
    (1) Retrieve CMD coefficients so that later we can calculate pre-1989 rates 
        based on the Moody's Baa and US Treasury 5-year rates.
*******************************************************************************/

foreach topbottom in "top01 bot99pt9" "top1 bot99" { 

    local top : word 1 of `topbottom' 
    local bot : word 2 of `topbottom' 

    import delim "$dboxroot/cmd_fr/csv/2021-07-15/`top'-niw-ust5-baa-btstrp200/coef_ses4.txt", clear

    keep v1
    assert _N == 4

    xpose, clear
    rename (v1-v4) (coef_`top'_ln_rI coef_`top'_ln_rC coef_`bot'_ln_rI coef_`bot'_ln_rC)

    assert _N == 1

    foreach column of varlist _all {
        global `column' = `column'[1]
    }
}

/*******************************************************************************
    (2) Retrieve interest income of the top 1%, top 0.1%, bottom 99%, and bottom
        99.9% from CMD inputs files; also retrieve logged Moody's Baa and UST 5 
        rates for calculating pre-1989 group rates.
*******************************************************************************/

foreach topbottom in "top01 bot99pt9" "top1 bot99" {

    local top : word 1 of `topbottom' 
    local bot : word 2 of `topbottom' 

    import delimited $dtadir/cmdinputs_`top'`bot'_20210615.csv, clear

    keep year y1_niwrank y2_niwrank ln_r_baa ln_r_ust5
    rename (y1_niwrank y2_niwrank) (y_`top' y_`bot')

    assert !missing(ln_r_baa) & !missing(ln_r_ust5)
    assert missing(y_`top') & missing(y_`bot') if year > 2016
    drop if year > 2016

    assert !missing(y_`top') & !missing(y_`bot')

    tempfile flows_`top'`bot'
    save `flows_`top'`bot''
}

/*******************************************************************************
    (3) Retrieve interest rates of groups from (2) after implementing CMD 
        exercise
*******************************************************************************/

foreach topbottom in "top01 bot99pt9" "top1 bot99" { 

    local top : word 1 of `topbottom' 
    local bot : word 2 of `topbottom' 
    
    import delim "$dboxroot/cmd_fr/csv/2021-07-15/`top'-niw-ust5-baa-btstrp200/r4.txt", clear

    keep v1-v9 

    rename (v1-v9) ///
        (year r_`top' r_`bot' ub_r_`top' lb_r_`top' ub_r_`bot' lb_r_`bot' se_r_`top' se_r_`bot')

    assert !missing(r_`top') & !missing(r_`bot') & !missing(ub_r_`top') & ///
        !missing(ub_r_`bot') & !missing(lb_r_`top') & !missing(lb_r_`bot')

    tempfile rates_`top'`bot'
    save `rates_`top'`bot''
}

/*******************************************************************************
    (3) Merge files together
*******************************************************************************/

use `flows_top01bot99pt9', clear
merge 1:1 year using `flows_top1bot99', assert(3) nogen

merge 1:1 year using `rates_top01bot99pt9', assert(1 3)
assert _merge == 1 if year < 1989
assert _merge == 3 if year >= 1989
drop _merge 

merge 1:1 year using `rates_top1bot99', assert(1 3)
assert _merge == 1 if year < 1989
assert _merge == 3 if year >= 1989
drop _merge 

/*******************************************************************************
    (4) Use coefficients from (1), Moody's Baa, and UST 5-year rates to 
        calculate pre-1989 rates
*******************************************************************************/

foreach grp in top1 top01 bot99 bot99pt9 {
    gen ln_r_`grp'_extrplt = ${coef_`grp'_ln_rC} * ln_r_baa + ${coef_`grp'_ln_rI} * ln_r_ust5
    gen r_`grp'_extrplt = exp(ln_r_`grp'_extrplt)

    gen checkdiff = abs(r_`grp'_extrplt - r_`grp') if year >= 1989
    assert checkdiff <= 1E-3 if year >= 1989
    drop checkdiff

    replace r_`grp' = r_`grp'_extrplt if year < 1989
    assert !missing(r_`grp')
}

drop *_extrplt ln_r_*

/*******************************************************************************
    (5) Use 1989 SEs for pre-1989 years to construct upper and lower bounds
*******************************************************************************/

gen t_stat = (ub_r_top1 - r_top1) / se_r_top1
qui summ t_stat, meanonly
local t_stat = `r(mean)'
drop t_stat

sort year
qui count if year <= 1989
local row1989 = `r(N)'
assert year[`row1989'] == 1989

foreach grp in top1 top01 bot99 bot99pt9 {
    replace se_r_`grp' = se_r_`grp'[`row1989'] if year < 1989
    replace ub_r_`grp' = r_`grp' + (`t_stat' * se_r_`grp') if year < 1989
    replace lb_r_`grp' = r_`grp' - (`t_stat' * se_r_`grp') if year < 1989

    assert !missing(se_r_`grp') & !missing(ub_r_`grp') & !missing(lb_r_`grp')
}

/*******************************************************************************
    (6) Calculate additional rate using scaling factor from Owen's 
        2021-06-16-cmd_3tier note (equation 5).
*******************************************************************************/

gen scaling_ratio = (y_top1 - y_top01) / (y_top1 - ((r_top1 / r_top01) * y_top01))
assert scaling_ratio <= 1.056

gen r_p99_99pt9 = r_top1 * scaling_ratio
drop scaling_ratio

/*******************************************************************************
    (7) Check output, clean up
*******************************************************************************/

assert y_top1 > y_top01
assert y_bot99pt9 > y_bot99

label variable y_top01 "fiint + intest (trillions), top 0.1% NIW"  
label variable y_top1 "fiint + intest (trillions), top 1% NIW"  
label variable y_bot99pt9 "fiint + intest (trillions), bottom 99.9% NIW"  
label variable y_bot99 "fiint + intest (trillions), bottom 99% NIW"  

label variable r_top01 "CMD interest rate, top 0.1% NIW"  
label variable r_top1 "CMD interest rate, top 1% NIW"  
label variable r_bot99pt9 "CMD interest rate, bottom 99.9% NIW"  
label variable r_bot99 "CMD interest rate, bottom 99% NIW" 
label variable se_r_top01 "Bootstrap SE: CMD interest rate, top 0.1% NIW"  
label variable se_r_top1 "Bootstrap SE: CMD interest rate, top 1% NIW"  
label variable se_r_bot99pt9 "Bootstrap SE: CMD interest rate, bottom 99.9% NIW"  
label variable se_r_bot99 "Bootstrap SE: CMD interest rate, bottom 99% NIW" 
label variable ub_r_top01 "Upper bound: CMD interest rate, top 0.1% NIW"  
label variable ub_r_top1 "Upper bound: CMD interest rate, top 1% NIW"  
label variable ub_r_bot99pt9 "Upper bound: CMD interest rate, bottom 99.9% NIW"  
label variable ub_r_bot99 "Upper bound: CMD interest rate, bottom 99% NIW" 
label variable lb_r_top01 "Lower bound: CMD interest rate, top 0.1% NIW"  
label variable lb_r_top1 "Lower bound: CMD interest rate, top 1% NIW"  
label variable lb_r_bot99pt9 "Lower bound: CMD interest rate, bottom 99.9% NIW"  
label variable lb_r_bot99 "Lower bound: CMD interest rate, bottom 99% NIW" 
label variable r_p99_99pt9 "CMD-derived interest rate, P99-99.9 by NIW"

/*******************************************************************************
    (8) Save output
*******************************************************************************/

sort year
save $outdir/cmd_flows_rates_niwranks20210715.dta, replace
